knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Load libraries
library(tidyverse)
library(cowplot)
library(ggplot2)
library(ggpattern)
library(ggsci)
library(utils)
library(reshape2)
library(stringr)
library(gridGraphics)
library(zellkonverter)
library(SingleCellExperiment)
library(cytomapper)
library(scater)
library(ggpubr)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis"
# Set path to masks
masks_tonsil.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
#masks_lung.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
Next, we look at U using different training datasets. For this, we read in the results from CISI training on the Tonsil th152 and the Immucan lung dataset (tonsil_vs_lung.py).
## Read results
# Read in all results for tonsil and lung comparison into one dataframe
tonsil_vs_lung.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
tonsil_vs_lung.files <- tonsil_vs_lung.files[grepl(
"tonsil_vs_lung|Tonsil_th152/training/full|Immucan_lung/training/subset", tonsil_vs_lung.files)]
tonsil_vs_lung.res <- lapply(tonsil_vs_lung.files, read_results, type="u") %>%
bind_rows()
# Get information about best U files
tonsil_vs_lung.u_files <- tonsil_vs_lung.res %>%
dplyr::filter(dataset==training) %>%
dplyr::select(k, training, `Best crossvalidation fold`, datasize) %>%
distinct()
# Harmonize all dataset names
patterns <- unique(unlist(lapply(tonsil_vs_lung.res$training, function(name){
if(length(str_split(name, "_")[[1]])==1){
name
}
})))
tonsil_vs_lung.res$training <- unlist(lapply(tonsil_vs_lung.res$training, function(pat){
patterns[str_detect(pat, fixed(patterns, ignore_case=TRUE))]
}))
tonsil_vs_lung.res$dataset <- unlist(lapply(tonsil_vs_lung.res$dataset, function(pat){
patterns[str_detect(pat, fixed(patterns, ignore_case=TRUE))]
}))
## Read U
tonsil_vs_lung.u <- lapply(1:(nrow(tonsil_vs_lung.u_files)), function(i){
file <- file.path(analysis.path, tonsil_vs_lung.u_files[i, "training"], "training",
tonsil_vs_lung.u_files[i, "datasize"],
paste0("k_", tonsil_vs_lung.u_files[i, "k"]),
paste0("crossvalidation_", tonsil_vs_lung.u_files[i, "Best crossvalidation fold"]),
"gene_modules.csv")
u_temp <- read_U(file, "training")
}) %>% bind_rows() %>%
dplyr::rename(dataset=rep)
## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
tonsil_vs_lung_X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
tonsil_vs_lung_X.files <- tonsil_vs_lung_X.files[grepl(
"tonsil_vs_lung|Tonsil_th152/training/full|Immucan_lung/training/subset", tonsil_vs_lung_X.files)]
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, which k was used and if it
# is the ground truth or simulated X)
# Remove neg. values and transform counts?
tonsil_vs_lung_X.list <- lapply(tonsil_vs_lung_X.files, read_results, type="x")
tonsil_vs_lung_X.list <- lapply(tonsil_vs_lung_X.list, function(sce.temp){
pat <- metadata(sce.temp)
metadata(sce.temp)$dataset <- patterns[str_detect(pat$dataset,
fixed(patterns, ignore_case=TRUE))]
metadata(sce.temp)$training <- patterns[str_detect(pat$training,
fixed(patterns, ignore_case=TRUE))]
# TODO: Remove negative values? Add transformed counts?
# counts(sce.temp) <- pmax(counts(sce.temp), 0)
assay.name <- names(assays(sce.temp))
for (a in assay.name[-1]) {
assay(sce.temp, a) <- NULL
}
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
## Read masks
# Read in masks for tonsil data
masks.tonsil <- loadImages(masks_tonsil.path, as.is = TRUE)
mcols(masks.tonsil) <- DataFrame(sample_id=names(masks.tonsil))
# For each different measurement of training results, plot a barplot comparing
# diff. k-sparsities, simulation types and training-test set combinations
for (measure in names(tonsil_vs_lung.res)[2:10]) {
cat('##### ', measure, '\n')
# Reorder dataframe for plotting
data_temp <- tonsil_vs_lung.res %>%
dplyr::select(dataset, training, simulation, k, !!measure) %>%
mutate(group=paste(training, dataset, sep="_"))
# Create barplot
tonsil_vs_lung.barplot <- ggplot(data_temp, aes(x=group, y=!!sym(measure), fill=k, pattern=simulation)) +
geom_bar_pattern(stat="identity",
position=position_dodge(preserve="single"),
width=0.6,
color="black",
pattern_fill="black",
pattern_angle=45,
pattern_density=0.3,
pattern_spacing=0.01,
pattern_key_scale_factor=0.6) +
scale_y_continuous(limits=c(0, 1)) +
scale_fill_npg() +
scale_pattern_manual(values=c(composite="stripe", noisy="none"),
labels=c("Composite", "Noisy")) +
labs(x="Training_Test Dataset", y=str_to_title(measure), pattern="Simulation Type") +
guides(pattern=guide_legend(override.aes=list(fill="white")),
fill=guide_legend(override.aes=list(pattern="none"))) +
theme_cowplot()
print(tonsil_vs_lung.barplot)
cat("\n\n")
}
tonsil_vs_lung.cor <- plot_U(tonsil_vs_lung.u, "k", "dataset")
plot_U_membership(tonsil_vs_lung.u, "k", "dataset")
tonsil_vs_lung.mantel <- lapply(tonsil_vs_lung.cor, function(l){
mantel_test(l[[1]], l[[2]])$mantel_r
}) %>%
as.data.frame(col.names=names(tonsil_vs_lung.cor), check.names=FALSE)
knitr::kable(tonsil_vs_lung.mantel, digits=2,
col.names=paste0("k = ", names(tonsil_vs_lung.mantel)))
| k = 1 | k = 3 |
|---|---|
| 0.67 | 0.42 |
tonsil_vs_lung_X.plotList <- keep(tonsil_vs_lung_X.list, function(x){
metadata(x)$training=="tonsil" & metadata(x)$dataset=="tonsil" & metadata(x)$k=="1"})
names(tonsil_vs_lung_X.plotList) <- lapply(tonsil_vs_lung_X.plotList,
function(x){metadata(x)$ground_truth})
tonsil_vs_lung_X.imgList <- plot_cells(tonsil_vs_lung_X.plotList, masks.tonsil,
"CD20")
tonsil_vs_lung_X.img <- plot_grid(plotlist=append(tonsil_vs_lung_X.imgList[grepl("20220520_TsH_th152_cisi1_002",
names(tonsil_vs_lung_X.imgList))],
tonsil_vs_lung_X.imgList[grepl("legend",
names(tonsil_vs_lung_X.imgList))]),
ncol=2, labels=names(tonsil_vs_lung_X.plotList),
label_size=15, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
tonsil_vs_lung_X.img
tonsil_vs_lung_X.plotListRenamed <- lapply(names(tonsil_vs_lung_X.plotList), function(n){
rownames(tonsil_vs_lung_X.plotList[[n]]) <- paste(rownames(tonsil_vs_lung_X.plotList[[n]]),
n, sep="\n")
tonsil_vs_lung_X.plotList[[n]]
})
names(tonsil_vs_lung_X.plotListRenamed) <- names(tonsil_vs_lung_X.plotList)
tonsil_vs_lung_X.overlayed <- do.call("rbind", tonsil_vs_lung_X.plotListRenamed)
pois <- c("CD20\nsimulated", "CD20\nground_truth")
tonsil_vs_lung_X.imgDiff <- plotCells(mask=masks.tonsil[unique(colData(tonsil_vs_lung_X.overlayed)$sample_id)],
object=tonsil_vs_lung_X.overlayed,
cell_id="ObjectNumber", img_id="sample_id", colour_by=pois,
return_plot=TRUE, image_title=list(cex=1),
scale_bar=list(cex=1, lwidth=5),
legend=list(colour_by.title.cex=0.7, colour_by.labels.cex=0.7))
ggdraw(tonsil_vs_lung_X.imgDiff$plot)
Plot of asinh transformed counts coloured by defined celltype.
tonsil_vs_lung_X.exprsList <- keep(tonsil_vs_lung_X.list, function(x){
metadata(x)$training=="lung" & metadata(x)$dataset=="lung" & metadata(x)$k=="1"})
names(tonsil_vs_lung_X.exprsList) <- lapply(tonsil_vs_lung_X.exprsList,
function(x){metadata(x)$ground_truth})
tonsil_vs_lung_X.Exprs <- plot_grid(plot_exprs(tonsil_vs_lung_X.exprsList, "B", "CD3", "CD20"),
plot_exprs(tonsil_vs_lung_X.exprsList, "BnT", "CD3", "CD20"),
plot_exprs(tonsil_vs_lung_X.exprsList, "Neutro", "MPO", "CD15"),
ncol=1)
print(tonsil_vs_lung_X.Exprs)
For k=1.
tonsil_vs_lung_X.df <- lapply(tonsil_vs_lung_X.list, function(sce){
counts.long <- as.data.frame(counts(sce)) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable) %>%
mutate(k=metadata(sce)$k,
dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
}) %>% bind_rows() %>%
group_by(protein, cell, k, dataset) %>%
summarise_all(na.omit)
for (i in unique(tonsil_vs_lung_X.df$dataset)) {
cat('#####', i, '\n')
tonsil_vs_lung.proteinPlot <- ggscatter(tonsil_vs_lung_X.df %>%
dplyr::filter(k=="1" & dataset==i),
x="ground_truth", y="simulated",
add="reg.line",
color=pal_npg("nrc")("1"),
add.params=list(color=pal_npg("nrc")("4")[4],
size=2)) +
facet_wrap(~ protein, scales="free", ncol=5) +
theme_cowplot(10) +
geom_abline(slope=1, linetype="dashed") +
stat_cor(size=2)
print(tonsil_vs_lung.proteinPlot)
cat('\n\n')
}